swev-id: scikit-learn__scikit-learn-13135 - Sort k-means bin edges#24
swev-id: scikit-learn__scikit-learn-13135 - Sort k-means bin edges#24casey-brooks wants to merge 1 commit intoscikit-learn__scikit-learn-13135from
Conversation
Local Validation
site_packages = '/root/.local/lib/python3.11/site-packages' sys.path.insert(0, site_packages) import sklearn sys.modules.pop('sklearn.preprocessing._discretization', None) spec = importlib.util.spec_from_file_location( importlib.reload(sklearn.preprocessing) sys.modules.pop('sklearn.utils._unittest_backport', None) sys.modules.pop('sklearn.utils.testing', None) from pytest import main with tempfile.TemporaryDirectory() as tmpdir:
|
noa-lucent
left a comment
There was a problem hiding this comment.
Fix sorts k-means centers before constructing bin edges, ensuring monotonic bins. Added regression coverage for the failing case and multi-feature monotonicity; tests look solid.
Issue
Fixes #23
Reproduction
Observed failure (pre-fix):
Fix
bin_edges_from[col_min, interior midpoints, col_max]to guarantee monotonic edges forstrategy='kmeans'Tests
import numpy as np
np.int = int
import importlib.util
import importlib
import sys
import tempfile
from pathlib import Path
site_packages = '/root/.local/lib/python3.11/site-packages'
repo_root = '/workspace/scikit-learn'
sys.path.insert(0, site_packages)
import sklearn
import sklearn.preprocessing
sys.modules.pop('sklearn.preprocessing._discretization', None)
spec = importlib.util.spec_from_file_location(
'sklearn.preprocessing._discretization',
f'{repo_root}/sklearn/preprocessing/_discretization.py')
module = importlib.util.module_from_spec(spec)
sys.modules['sklearn.preprocessing._discretization'] = module
spec.loader.exec_module(module)
importlib.reload(sklearn.preprocessing)
sys.modules.pop('sklearn.utils._unittest_backport', None)
unittest_spec = importlib.util.spec_from_file_location(
'sklearn.utils._unittest_backport',
f'{repo_root}/sklearn/utils/_unittest_backport.py')
unittest_module = importlib.util.module_from_spec(unittest_spec)
sys.modules['sklearn.utils._unittest_backport'] = unittest_module
unittest_spec.loader.exec_module(unittest_module)
sys.modules.pop('sklearn.utils.testing', None)
testing_spec = importlib.util.spec_from_file_location(
'sklearn.utils.testing',
f'{repo_root}/sklearn/utils/testing.py')
testing_module = importlib.util.module_from_spec(testing_spec)
sys.modules['sklearn.utils.testing'] = testing_module
testing_spec.loader.exec_module(testing_module)
from pytest import main
with tempfile.TemporaryDirectory() as tmpdir:
test_path = Path(tmpdir) / 'test_discretization_local.py'
original = Path(repo_root) / 'sklearn' / 'preprocessing' / 'tests' / 'test_discretization.py'
test_path.write_text(original.read_text(encoding='utf-8'), encoding='utf-8')
sys.exit(main(['-q', str(test_path), '-k', 'kmeans']))
PY`
PATH=/root/.local/bin:$PATH flake8 sklearn/preprocessing/_discretization.py sklearn/preprocessing/tests/test_discretization.py